﻿/*  
cd /projects/data_commons/lbd_code/
qsas lbd_2_runif.sas 5 &
*/

libname llbd "/projects/data_commons/lbd/";

/* 
================================================================================
%include "/projects/data_commons/lbd_code/lbd_0_m_lbd_basic.sas" /source2;
As of 2020-12-29 (adapted from lbd_1_raw.sas). 
================================================================================
*/

%macro m_lbd_basic(pdt_in= , pdt_out= , p_exna= );

/* Basic cleaning */

data &pdt_out. (drop=fullyr firmnum refnaics refsic fyear lyear st2 cty2 st cty);
  set &pdt_in.;
  naics = substr(refnaics,1,6)*1;
  sic = substr(refsic,1,4)*1;
  year = fullyr;
  firmnum3 = firmnum*1;
  *nofirmnum = 0;
  *if firmnum3 = . then nofirmnum = 1;
  /* REDACTED
  Code updating firmnum has been redacted */
  st2 = st*1;
  cty2 = cty*1;
  fips = st2*1000 + cty2;
run;

data &pdt_out. (drop=firmnum3);
  set &pdt_out.;
  firmnum = firmnum3;
run;

%if "&p_exna." = "1" %then %do;

%put Exclude NAs.;

data &pdt_out.;
  set &pdt_out.;
  if firmnum = . then delete;
  if worker = . then delete;
  if worker = 0 then delete;
  if year = . then delete;
run;

%end;

%mend;

/* 
================================================================================
runif for lbdid
================================================================================
*/

data lbd_raw;
  set llbd.lbd_raw_orig;
  year = fullyr;
run;

/* 
-----------------------------------------------------
Check if lbdid is unique in each year 
-----------------------------------------------------
*/

data lbd_check;
  set lbd_raw;
  est = 1;
run;

proc means data=lbd_check noprint;
  by year lbdid;
  output out=lbd_check sum(est)=est;
run;

proc means data=lbd_check noprint;
  by year;
  output out=lbd_check max(est)=est;
run;

proc print data=lbd_check;
  where est ~= 1;
run;

/* 
-----------------------------------------------------
Generate random number for observations that are not dropped
-----------------------------------------------------
*/

%m_lbd_basic(pdt_in=lbd_raw,pdt_out=lbd,p_exna=1);

data lbd;
  set lbd (keep = year lbdid);
run;

proc sort data=lbd;
  by year lbdid;
run;

data lbd;
  set lbd;
  call streaminit(60637);
  runif_1=rand("Uniform");
run;

data lbd;
  set lbd;
  call streaminit(5807);
  runif_2=rand("Uniform");
run;

/* 
-----------------------------------------------------
Generate random number for observations that are dropped
-----------------------------------------------------
*/

proc sort data=lbd; by year lbdid;
proc sort data=lbd_raw; by year lbdid;

data lbd_add;
  merge lbd(in=x) lbd_raw(in=y);
  by year lbdid;
  if x = 0;
  keep year lbdid;
run;

data lbd_add;
  set lbd_add;
  call streaminit(5757);
  runif_1=rand("Uniform");
run;

data lbd_add;
  set lbd_add;
  call streaminit(1126);
  runif_2=rand("Uniform");
run;

/* 
-----------------------------------------------------
Export year-lbdid-runif
-----------------------------------------------------
*/

data lbd;
  set lbd lbd_add;
run;

proc sort data=lbd; by year lbdid;

data llbd.lbd_runif;
  set lbd;
run;

/* End of sas file */
